#include "opencl_source_map.hpp" 
namespace MNN { 
#ifndef MNN_OPENCL_BUFFER_CLOSED
const char* strassen_binary_buf = 
"#ifdef MNN_SUPPORT_FP16\n"
"#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
"#endif\n"
"__kernel void binary_cfunction_buf(__private int global_dim0,__private int global_dim1,\n"
" __global FLOAT* input0,\n"
" __private const int offsetC,\n"
" __private const int strideC,\n"
" __global FLOAT* input1,__global FLOAT* output,\n"
" __private const int width,//[offsetA,offsetB,offsetC,0]\n"
" __private const int height//[strideA,strideB,strideC,0]\n"
") {\n"
" int2 pos=(int2)(get_global_id(0),get_global_id(1));// [X/16,Y]\n"
" \n"
" if (pos.x<global_dim0 && pos.y<global_dim1) {\n"
" int offset_11=offsetC+pos.x*8+pos.y*strideC;\n"
" int offset_12=offset_11+width;\n"
" int offset_21=offset_11+strideC*height;\n"
" int offset_22=offset_21+width;\n"
" FLOAT8 in_11=vload8(0,input0+offset_11);\n"
" FLOAT8 in_12=vload8(0,input0+offset_12);\n"
" FLOAT8 in_21=vload8(0,input0+offset_21);\n"
" FLOAT8 in_22=vload8(0,input0+offset_22);\n"
" FLOAT8 in_cx=vload8(0,input1+pos.x*8+pos.y*width);\n"
" in_12=in_12+in_cx;\n"
" in_21=in_12+in_21;\n"
" in_12=in_22+in_12;\n"
" in_22=in_22+in_21;\n"
" in_12=in_11+in_12;\n"
" vstore8(in_21,0,output+offset_21);\n"
" vstore8(in_22,0,output+offset_22);\n"
" vstore8(in_12,0,output+offset_12);\n"
" }\n"
"}\n"
"#ifndef OPERATOR\n"
"#define OPERATOR in0+in1\n"
"#endif\n"
"__kernel void binary_function_buf(__private int global_dim0,__private int global_dim1,\n"
" __global FLOAT* input0,__global FLOAT* input1,__global FLOAT* output,\n"
" __private const int4 baseOffsets,//[offsetA,offsetB,offsetC,0]\n"
" __private const int4 strides//[strideA,strideB,strideC,0]\n"
") {\n"
" int2 pos=(int2)(get_global_id(0),get_global_id(1));// [X/16,Y]\n"
" \n"
" if (pos.x<global_dim0 && pos.y<global_dim1) {\n"
" const int baseOffsetA=baseOffsets.x;\n"
" const int baseOffsetB=baseOffsets.y;\n"
" const int baseOffsetC=baseOffsets.z;\n"
" const int strideA=strides.x;\n"
" const int strideB=strides.y;\n"
" const int strideC=strides.z;\n"
" \n"
" \n"
" int offsetA=pos.x*8+pos.y*VEC_H*strideA+baseOffsetA;\n"
" int offsetB=pos.x*8+pos.y*VEC_H*strideB+baseOffsetB;\n"
" int offsetC=pos.x*8+pos.y*VEC_H*strideC+baseOffsetC;\n"
" {\n"
" FLOAT8 in0=vload8(0,input0+offsetA);\n"
" FLOAT8 in1=vload8(0,input1+offsetB);\n"
" FLOAT8 out=OPERATOR;\n"
" vstore8(out,0,output+offsetC);\n"
" }\n"
" #if VEC_H >= 2\n"
" {\n"
" offsetA += strideA;\n"
" offsetB += strideB;\n"
" offsetC += strideC;\n"
" FLOAT8 in0=vload8(0,input0+offsetA);\n"
" FLOAT8 in1=vload8(0,input1+offsetB);\n"
" FLOAT8 out=OPERATOR;\n"
" vstore8(out,0,output+offsetC);\n"
" }\n"
" #endif\n"
" #if VEC_H == 4\n"
" {\n"
" offsetA += strideA;\n"
" offsetB += strideB;\n"
" offsetC += strideC;\n"
" FLOAT8 in0=vload8(0,input0+offsetA);\n"
" FLOAT8 in1=vload8(0,input1+offsetB);\n"
" FLOAT8 out=OPERATOR;\n"
" vstore8(out,0,output+offsetC);\n"
" }\n"
" {\n"
" offsetA += strideA;\n"
" offsetB += strideB;\n"
" offsetC += strideC;\n"
" FLOAT8 in0=vload8(0,input0+offsetA);\n"
" FLOAT8 in1=vload8(0,input1+offsetB);\n"
" FLOAT8 out=OPERATOR;\n"
" vstore8(out,0,output+offsetC);\n"
" }\n"
" #endif\n"
" }\n"
"}\n"
;
#endif
}
